module sscluster
	use ssmean
	use hmdamod
	implicit none
	
	integer	:: clustertype=0
	logical, parameter		:: FE=.false.
	
	contains
	
	
	subroutine mkcombs(n,k,ac)
		integer		:: n,k
		integer		:: ncombs
		integer, allocatable	:: ac(:,:)
		integer		:: comb(k),i,c,j

		if(allocated(ac)) deallocate(ac)

		ncombs=k*gamma(n+1.0)/(gamma(k+1.0)*gamma(n-k+1.0))
		allocate(ac(k,ncombs ))
		comb=[(i,i=1,k)]
		c=1
		do 
			call mdisp(real(comb))
			do j=1,k
				ac(:,c)=[comb(j),comb(1:j-1),comb(j+1:)]
				c=c+1
			enddo
			if(comb_next(n,k,comb)) exit
		enddo
	end subroutine
	
	
	function comb_next (n, k, comb) result(done)
		integer	:: n,k,comb(k)
		logical	:: done
		integer	:: i,j

		done=.false.
		if ( comb(k) < n ) then
			comb(k) = comb(k) + 1
			return
		end if

		do i = k, 2, -1
			if ( comb(i-1) < n-k+i-1 ) then
			comb(i-1) = comb(i-1) + 1
			do j = i, k
			  comb(j) = comb(i-1) + j - ( i-1 )
			end do
			return
		  end if
		end do
		done = .true.
	end function

	
	function skew(x) result(val)
		real	:: x(:),val
		integer	:: n
		n=size(x)
		val=sum(x**3)/n
		val=val/(sum(x**2)/n)**1.5
	end function
	
	function ssclust_MC(rl) result(val)
		use rlse_int
		integer, parameter		:: nmethods=5
		real, allocatable		:: rl(:,:)
		logical					:: val

		real					:: rls(2,nmethods,nssim),tstats(nssim),xdata(n_sample)

		integer, parameter		:: nX=5, nopc=10
		real					:: X(n_sample*nopc,nX),R(n_sample*nopc),Y(n_sample*nopc),beta1(nX),betahat
		real					:: nus(n_sample)
		integer					:: ncs(n_sample)
		integer					:: l,i,snc,j
		
		ic=ic+1
		if(ic>7) then
			val=.false.
			return
		endif
		val=.true.
		ncs=nopc
!$omp parallel do private(X,R,Y,beta1,nus,i,snc,j,betahat,xdata)		
		do l=1,nssim
			call rnnoa(Y)
			do j=1,nX-1
				call rnnoa(X(:,j))
			enddo
			X(:,nX)=1
			X=matmul(X,choleski(invertpd(matmul(transpose(X),X))))
			call rnnoa(R)			
			nus=getrandomx(ic)
			snc=0
			do i=1,n_sample
				Y(snc+1:snc+ncs(i))=Y(snc+1:snc+ncs(i))+R(snc+1:snc+ncs(i))*nus(i)
				snc=snc+ncs(i)
			enddo
			call rlse(R,X,beta1,intcep=0)
			R=R-matmul(X,beta1)
			rls(:,1:3,l)=cluster_altmethods(Y,R,X,ncs,tstats(l))
			rls(:,4,l)=cluster_CGM(Y,R,X,ncs)
			call rlse(Y,X,beta1,intcep=0)
			Y=Y-matmul(X,beta1)
			betahat=sum(Y*R)/sum(R*R)
			Y=R*(Y-betahat*R)/(sum(R*R)/n_sample)
			snc=0
			do i=1,n_sample
				xdata(i)=sum(Y(snc+1:snc+ncs(i)))+betahat
				snc=snc+ncs(i)
			enddo
			rls(:,5,l)=getrl_new(xdata)
		enddo
		rl=setfinalrls(tstats,rls)
	end function
	
	
	function ssclust_data(rl) result(val)
		use rnsri_int
		use rnper_int
		use rnund_int
		use rlse_int
		use rcov_int
		integer, parameter		:: nmethods=5
		real, allocatable		:: rl(:,:)
		logical					:: val

		real					:: rls(2,nmethods,nssim),tstats(nssim),xdata(n_sample)
		integer, parameter		:: nX=5
		integer, parameter		:: nclustermax=1000, nopcmax=20000
		integer, save			:: ncluster, nopc(nclustermax), cids(nopcmax,nclustermax)
		real, allocatable, save	:: datamat(:,:)
		integer, save			:: ndata,npop
		
		integer, allocatable	:: clusterid(:)
		integer					:: clusterlist(nclustermax)

		integer					:: sids(300000),ns

		integer, allocatable	:: clusterid0(:)
		integer					:: l,i,j,ui(n_sample),ix(nX),jx(nX+1),c
		real, allocatable		:: X0(:,:),R0(:),Y0(:),X(:,:),Y(:),R(:),e(:)
		real					:: beta0(merge(nX,nX+1,FE)), beta1(merge(nX-1,nX,FE)), sse, betahat
		integer					:: ncs(n_sample)
		
		integer, allocatable, save	:: ac(:,:)
		real					:: u(1)

		if(ic==0) then
!	1	2			3		4	5		6		7			8			9		10		11		12		13		14		15		16		17			18		19
!year	intmonth	stated	age	female	weight	hispanic	nonwhite	wage	lwage	marr	marrfe	alloc	public	dind02	docc02	cbsafips	edcat3	uniona

			datamat=loadcsv("C:\dropbox\mystuff\heavymean\data\cps_Hank\cpshank.csv",.true.)
			where(datamat(:,17)==0)
				datamat(:,17)=datamat(:,3)
			endwhere
			clusterid=datamat(:,17) ! +datamat(:,15)
			if(clustertype==0) then
				datamat=datamat(:,[10,4,5,7,8,11,13,14,18,19])
				call mkcombs(9,nX,ac)
			else
				datamat=datamat(:,[19,4,5,7,8,11,13,14,18])
				call mkcombs(8,nX,ac)
			endif
			call mdisp(real(ac))
			print *,"number of combinations",size(ac,2)
			ndata=size(datamat,2)
			npop=size(datamat,1)
			print *,"ndata=",ndata			
			ncluster=0
			nopc=0
			do i=1,npop
				j=findloc(clusterlist(1:ncluster),clusterid(i),dim=1)
				if(j==0) then
					ncluster=ncluster+1
					if(ncluster>nclustermax) then
						print *,"ncluster overflow"
						stop
					endif
					clusterlist(ncluster)=clusterid(i)
					j=ncluster
				endif
				nopc(j)=nopc(j)+1
				cids(nopc(j),j)=i
			enddo
			print *,"number of clusters",ncluster
			call mdisp(-sort(real(-nopc(1:ncluster))))
			if(maxval(nopc)>nopcmax) then
				print *,"nopc overflow"
				stop
			endif
			if(FE) then
				do i=1,ncluster
					do j=1,ndata
						datamat(cids(1:nopc(i),i),j)=datamat(cids(1:nopc(i),i),j)-sum(datamat(cids(1:nopc(i),i),j))/nopc(i)
					enddo
				enddo
				c=0	
				do j=1,ncluster
					if(nopc(j)>1) then
						c=c+1
						nopc(c)=nopc(j)
						cids(:,c)=cids(:,j)
					endif
				enddo
				ncluster=c
				print *,"ncluster with more than one obs",ncluster
			endif
		endif
		ic=ic+1
		if(ic>200) then
			val=.false.
			return
		endif
		val=.true.
		print *,"entering X0 and Y0 generation"
		call rnun(u)
		ix=ac(:,size(ac,2)*u(1)+1)
		ix=ix+1
		call mdisp(real(ix))
		Y0=datamat(:,1)
		Y0=Y0/stddev(Y0)
		X0=datamat(:,ix)
		if(.not. FE) X0=reshape([X0,[(1.0,l=1,npop)]],[npop,nx+1])
		X0(:,1)=X0(:,1)/stddev(X0(:,1))
		call rlse(Y0,X0,beta0,intcep=0,sse=sse)
		sse=sse/npop
print *,"R2",1-sse
call mdisp(beta0)
		Y0=Y0-matmul(X0,beta0)
		call rlse(X0(:,1),X0(:,2:),beta1,intcep=0,sse=sse)
		sse=sse/npop
print *,"R2 of R0 on X0",1-sse			
		print *,"done generating X0 and Y0"
		cname="cluster"//convtos(ic)
		
!$omp parallel do private(ui,i,j,sids,ns,X,R,Y,e,ncs,betahat,xdata) schedule(dynamic)
		do l=1,nssim
			call rnund(ncluster,ui)
			ns=0
			do i=1,n_sample
				j=nopc(ui(i))
				sids(ns+1:ns+j)=cids(1:j,ui(i))
				ns=ns+j
				if(ns>size(sids)) then
					print *,"sids overflow"
					stop
				endif
			enddo
			X=X0(sids(1:ns),2:)
			X=matmul(X,choleski(invertpd(matmul(transpose(X),X))))
			R=X0(sids(1:ns),1)
			R=R-matmul(X,matmul(R,X))
			Y=Y0(sids(1:ns))
			e=Y-matmul(X,matmul(Y,X))
			do j=1,n_sample
				ncs(j)=nopc(ui(j))
			enddo
			rls(:,1:3,l)=cluster_altmethods(e,R,X,ncs,tstats(l))
			rls(:,4,l)=cluster_CGM(e,R,X,ncs)
			betahat=sum(e*R)/sum(R*R)
			Y=R*(e-betahat*R)/(sum(R*R)/n_sample)
			ns=0
			do i=1,n_sample
				j=nopc(ui(i))
				xdata(i)=sum(Y(ns+1:ns+j))+betahat
				ns=ns+j
			enddo
			rls(:,5,l)=getrl_new(xdata)
			deallocate(X,Y,R,e)
		enddo
		rl=setfinalrls(tstats,rls)
	end function

	function cluster_CGM(Y,R,X,ncs) result(val)
		real	:: Y(:),R(:),X(:,:),val(2)
		integer	:: ncs(:)
		real	:: Gas(size(ncs)),Z(size(Y)), XY(size(ncs)), XYstar(size(ncs))
		real	:: tstat,bootts(nboot),beta,s_beta
		real	:: betamin,betamax,delb,u(1),b0,b
		integer	:: i,nc,snc,j
			
		Z=R/sqrt(sum(R**2))
		snc=0
		do i=1,size(ncs)
			nc=ncs(i)
			Gas(i)=sum(Z(snc+1:snc+nc)**2)
			XY(i)=sum(Z(snc+1:snc+nc)*Y(snc+1:snc+nc))
			snc=snc+nc
		enddo
		beta=sum(XY)
		s_beta=sqrt(sum((XY-Gas*beta)**2))
		tstat=beta/s_beta
		
		do j=1,nboot
			XYstar=XY*urads(:,j)	
			b=sum(XYstar)
			bootts(j)=b/sqrt(sum((XYstar-Gas*b)**2))
		enddo
		val(1)=boole(count(tstat<bootts)<nboot*level/2 .or. count(tstat>bootts)<nboot*level/2)		
		
		betamin=beta-4*s_beta
		do
			if(reject(betamin)) exit
			betamin=betamin-s_beta
		enddo
		betamax=beta+4*s_beta
		do
			if(reject(betamax)) exit
			betamax=betamax+s_beta
		enddo
		delb=(betamax-betamin)/(nCIgrid-1)
		call rnun(u)
		betamin=betamin+delb*u(1)
		do
			if((.not. reject(betamin)) .or. betamin>=betamax ) exit
			betamin=betamin+delb
		enddo
		betamax=betamax+(1-u(1))*delb
		do
			if((.not. reject(betamax)) .or. betamin>=betamax ) exit
			betamax=betamax-delb
		enddo
		betamax=betamax+delb
		val(2)=(betamax-betamin)/sqrt(sum(R*R))
		
	contains 
		function reject(beta0) result(val)
			real	:: beta0
			logical	:: val
			real	:: XY0(size(ncs))
			integer	:: j,nrej
			real	:: bootts(nboot)
			
			tstat=(beta-beta0)/s_beta
			XY0=XY-beta0*Gas
		
			do j=1,nboot
				XYstar=XY0*urads(:,j)	
				b=sum(XYstar)
				bootts(j)=b/sqrt(sum((XYstar-Gas*b)**2))
			enddo
			nrej=count(tstat<bootts)
			val=boole(nrej<nboot*level/2 .or. nrej>nboot*(1-level/2))
		end function
	end function

	
	function cluster_altmethods(Y,R,X,ncs,tstat) result(val)
		real	:: Y(:),R(:),X(:,:),tstat,val(2,3)
		integer	:: ncs(:)
		integer	:: i,nc,snc,j
		real	:: Z(size(y),size(x,2)+1), ehat(size(Y)), s2, betahat, G(size(x,2)+1,size(ncs)), dGG(size(ncs)), KBM
		real, allocatable	:: mat(:,:),vNs(:),XspXs(:,:)
		real	:: eXs(size(x,2)+1,size(ncs)), evNs(size(ncs)), sumoeXs(size(x,2)+1,size(x,2)+1),rho,s2e
		integer	:: nreg,ncluster
		
		ncluster=size(ncs)
		betahat=sum(Y*R)/sum(R*R)
		ehat=Y-R*betahat
		s2=0
		snc=0
		do i=1,ncluster
			nc=ncs(i)
			s2=s2+sum(R(snc+1:snc+nc)*ehat(snc+1:snc+nc))**2
			snc=snc+nc
		enddo
		nreg=size(X,2)+1
		s2=((size(Y)-1.0)/(size(Y)-nreg))*(ncluster/(ncluster-1.0))*s2/sum(R*R)**2
		tstat=betahat/sqrt(s2)
		val(1,1)=rboole(betahat**2/s2>cvtstat**2)
		val(2,1)=2*cvtstat*sqrt(s2)

		if(all(ncs==1)) then
			val(:,2:3)=0
			return
		endif
		
		Z=reshape([R/sqrt(sum(R*R)),X],[size(y),nreg])
		betahat=sum(Z(:,1)*Y)
		rho=0
		s2=0
		snc=0
		do i=1,ncluster
			nc=ncs(i)
			XspXs=matmul(transpose(Z(snc+1:snc+nc,:)),Z(snc+1:snc+nc,:))
			vNs=Z(snc+1:snc+nc,1)+matmul(Z(snc+1:snc+nc,:),matmul(getmiddle05(XspXs),XspXs(:,1)))
			evNs(i)=sum(vNs)
			eXs(:,i)=sum(Z(snc+1:snc+nc,:),dim=1)
			dGG(i)=sum(vNs**2)
			G(:,i)=matmul(transpose(Z(snc+1:snc+nc,:)),vNs)
			s2=s2+sum(vNs*ehat(snc+1:snc+nc))**2
			rho=rho+sum(ehat(snc+1:snc+nc))**2
			snc=snc+nc
		enddo
		mat=-matmul(transpose(G),G)
		do i=1,ncluster
			mat(i,i)=mat(i,i)+dGG(i)
		enddo
		KBM=trace(mat)**2/trace(matmul(mat,mat))
		KBM=max(KBM,1.0)
		val(1,2)=betahat**2/s2>tin(1-level/2,KBM)**2
		val(2,2)=2*tin(1-level/2,KBM)*sqrt(s2)/sqrt(sum(R*R))
		
		if(all(ncs==1)) then
			val(:,3)=-1
			return
		endif
			
		
		s2e=sum(ehat**2)/size(y)
		rho=(rho-size(y)*s2e)/(sum(ncs**2)-size(y))
		
		mat=mat*(s2e-rho)/rho
		sumoeXs=matmul(eXs,transpose(eXs))
		do i=1,ncluster
			do j=1,ncluster
				mat(i,j)=mat(i,j)+(sum(G(:,i)*matmul(sumoeXs,G(:,j)))-evNs(i)*sum(eXs(:,i)*G(:,j))-evNs(j)*sum(eXs(:,j)*G(:,i)))
				if(i==j) mat(i,i)=mat(i,i)+evNs(i)**2
			enddo
		enddo
		KBM=trace(mat)**2/trace(matmul(mat,mat))
		KBM=max(KBM,1.0)
		val(1,3)=boole(betahat**2/s2>tin(1-level/2,KBM)**2)
		val(2,3)=2*tin(1-level/2,KBM)*sqrt(s2)/sqrt(sum(R*R))
	end function

	
	function getmiddle05(mat) result(val)
		use evcsf_int
		real	:: mat(:,:),val(size(mat,1),size(mat,1))
		real	:: evecs(size(mat,1),size(mat,1)),evals(size(mat,1)),evrs(size(mat,1),size(mat,1))
		integer	:: i
		call evcsf(mat,evals,evecs)
		do i=1,size(mat,1)
			if(evals(i)>1E-8) then
				evrs(:,i)=evecs(:,i)*((1.0-evals(i))**-0.5-1.0)/evals(i)
			else
				evrs(:,i)=0
			endif
		enddo
		val=matmul(evrs,transpose(evecs))
	end function
end module